Serratia (oligotyping)

Load packages, paths, functions

# Load main packages, paths and custom functions
source("../../../source/main_packages.R")
source("../../../source/paths.R")
source("../../../source/functions.R")

# Load supplementary packages
packages <- c("RColorBrewer", "ggpubr", "cowplot", "Biostrings", "openxlsx", "kableExtra")
invisible(lapply(packages, require, character.only = TRUE))

Preparation

Tables preparation

Seqtab

# move to oligotyping directory
setwd(paste0(path_oligo,"/serratia/oligotyping_Serratia_sequences-c1-s1-a0.0-A0-M10"))

# load the matrix count table
matrix_count <- read.table("MATRIX-COUNT.txt", header = TRUE) %>% t()

# arrange it
colnames(matrix_count) <- matrix_count[1,]
matrix_count <- matrix_count[-1,]
matrix_count <- matrix_count %>% as.data.frame()

# print it
matrix_count %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
CTC1 CTC10 CTC11 CTC12 CTC13 CTC14 CTC15 CTC2 CTC3 CTC4 CTC5 CTC6 CTC7 CTC9 NP41 S104 S25 S27 S30 S32 S36 S37 S38 S39 S44 S48 S49 S51 S77
A 10 24 10 7 442 3 10 3 4 8 0 26 2 2 0 4 3 2 1200 965 55 2132 46 666 17 2 43 13 314
G 4 6 1 0 207 0 7 2 2 5 1 5 2 1 40 0 5 0 655 298 36 874 7 306 6 0 23 5 0

Taxonomy

# move to oligotyping directory
setwd(paste0(path_oligo,"/serratia/oligotyping_Serratia_sequences-c1-s1-a0.0-A0-M10"))

# load the fasta table
fasta <- readDNAStringSet("OLIGO-REPRESENTATIVES.fasta")

# arrange it
fasta <- fasta %>% as.data.frame()
colnames(fasta) <- "seq"
fasta$oligotype <- rownames(fasta)
fasta <- fasta %>% dplyr::select(-c(seq))

# print it
fasta %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
oligotype
A A
G G

Change oligotype name by oligotype / MED nodes in the matrix count

# Reference file 

## move to tsv directory
setwd(path_tsv)

## load the reference table
ref_oligo_med2 <- read.table("2B_REF_info_serratia.tsv", sep="\t", header = TRUE)

## select only the 2 oligotypes of Serratia
ref_oligo_med2 <- ref_oligo_med2[!is.na(ref_oligo_med2$oligotype),]

## change order of columns
ref_oligo_med2 <- ref_oligo_med2 %>% select(c(seq, oligotype, MED_node_frequency_size, OLIGO_oligotype_frequency_size))

## create a column with reference name (will be used in plots)
ref_oligo_med2$ref <- paste0("oligotype_", ref_oligo_med2$OLIGO_oligotype_frequency_size, " / node_", ref_oligo_med2$MED_node_frequency_size)

## create a copy of fasta 
fasta2 <- fasta

# Matrix count

## create an oligotype column in the matrix count
matrix_count$oligotype <- rownames(matrix_count)

## change order of columns
matrix_count <- matrix_count %>% dplyr::select(c(oligotype, everything()))

## merge the matrix count and the reference dataframe
matrix_count2 <- matrix_count %>% merge(ref_oligo_med2 %>% dplyr::select(-c(seq)), by="oligotype")

## change order of columns
matrix_count2 <- matrix_count2 %>% dplyr::select(c(oligotype, MED_node_frequency_size, OLIGO_oligotype_frequency_size, ref, everything()))

## change rownames
rownames(matrix_count2) <- matrix_count2$ref

## change order of columns
matrix_count2 <- matrix_count2 %>% dplyr::select(-c(oligotype, ref, MED_node_frequency_size, OLIGO_oligotype_frequency_size))

## print it
matrix_count2 %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
CTC1 CTC10 CTC11 CTC12 CTC13 CTC14 CTC15 CTC2 CTC3 CTC4 CTC5 CTC6 CTC7 CTC9 NP41 S104 S25 S27 S30 S32 S36 S37 S38 S39 S44 S48 S49 S51 S77
oligotype_A (27) | size:6013 / node_N1114 (27) | size:6013 10 24 10 7 442 3 10 3 4 8 0 26 2 2 0 4 3 2 1200 965 55 2132 46 666 17 2 43 13 314
oligotype_G (23) | size:2498 / node_N1116 (23) | size:2498 4 6 1 0 207 0 7 2 2 5 1 5 2 1 40 0 5 0 655 298 36 874 7 306 6 0 23 5 0
## edit the fasta dataframe
fasta2 <- fasta2 %>% merge(ref_oligo_med2 %>% dplyr::select(-c(seq)), by="oligotype")
rownames(fasta2) <- fasta2$ref
fasta2 <- fasta2 %>% dplyr::select(-c(MED_node_frequency_size, OLIGO_oligotype_frequency_size, oligotype))

## print it
fasta2 %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
ref
oligotype_A (27) | size:6013 / node_N1114 (27) | size:6013 oligotype_A (27) | size:6013 / node_N1114 (27) | size:6013
oligotype_G (23) | size:2498 / node_N1116 (23) | size:2498 oligotype_G (23) | size:2498 / node_N1116 (23) | size:2498

Metadata

metadata <- read.csv(paste0(path_metadata,"/metadata_08_02_2021.csv"), sep=";")
rownames(metadata) <- metadata$Sample

Phyloseq object with oligotypes

# convert matrix_count into matrix and numeric
matrix_count <- matrix_count2 %>% as.matrix()
class(matrix_count) <- "numeric"

# phyloseq elements
OTU = otu_table(as.matrix(matrix_count), taxa_are_rows =TRUE)
TAX = tax_table(as.matrix(fasta2))
SAM = sample_data(metadata)

# phyloseq object
ps <- phyloseq(OTU, TAX, SAM)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2 taxa and 29 samples ]
## sample_data() Sample Data:       [ 29 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 2 taxa by 1 taxonomic ranks ]
compute_read_counts(ps)
## [1] 8511
# remove blanks
ps <- subset_samples(ps, Location!="Blank")
ps <- check_ps(ps)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2 taxa and 28 samples ]
## sample_data() Sample Data:       [ 28 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 2 taxa by 1 taxonomic ranks ]

Create new metadata with Percent

Load ps with all samples (for final plot)

setwd(path_rdata)
ps.filter <- readRDS("1D_MED_phyloseq_decontam.rds")
ps.filter <- check_ps(ps.filter)

Edit new metadata with Percent_serratia

guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 16, face = "italic", colour = "Black", angle = 0)))

# add read depth in sample table of phyloseq object
sample_data(ps.filter)$Read_depth <- sample_sums(ps.filter)

# select Wolbachia
ps.serratia <- ps.filter %>% subset_taxa(Genus=="Serratia")

# add read depth of Wolbachia
sample_data(ps.filter)$Read_serratia <- sample_sums(ps.serratia)
sample_data(ps.filter) %>% colnames()
##  [1] "Sample"        "Well"          "Primer1"       "Primer2"      
##  [5] "Location"      "Field"         "Country"       "Organ"        
##  [9] "Species"       "Individual"    "Individuals"   "Date"         
## [13] "Run"           "Control"       "Dna"           "Read_depth"   
## [17] "Read_serratia"
sample_data(ps.serratia) %>% colnames()
##  [1] "Sample"      "Well"        "Primer1"     "Primer2"     "Location"   
##  [6] "Field"       "Country"     "Organ"       "Species"     "Individual" 
## [11] "Individuals" "Date"        "Run"         "Control"     "Dna"        
## [16] "Read_depth"
# add percent of Wolbachia
sample_data(ps.filter)$Percent_serratia <- sample_data(ps.filter)$Read_serratia / sample_data(ps.filter)$Read_depth

# round the percent of Wolbachia at 2 decimals
sample_data(ps.filter)$Percent_serratia <- sample_data(ps.filter)$Percent_serratia %>% round(2)

# extract metadata table
test <- data.frame(sample_data(ps.filter))

# merge this metadata table with the other
new.metadata <- data.frame(sample_data(ps)) %>% merge(test %>% dplyr::select(c(Sample, Read_depth, Read_serratia, Percent_serratia)), by="Sample")
new.metadata <- test[new.metadata$Sample %in% sample_names(ps),]
rownames(new.metadata) <- new.metadata$Sample

# print it
new.metadata %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
Sample Well Primer1 Primer2 Location Field Country Organ Species Individual Individuals Date Run Control Dna Read_depth Read_serratia Percent_serratia
CTC1 CTC1 G5 V4-SA707 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 29779 14 0.00
CTC10 CTC10 D6 V4-SA704 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 37,9 2609 30 0.01
CTC11 CTC11 E6 V4-SA705 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 13874 11 0.00
CTC12 CTC12 F6 V4-SA706 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 40,1 1146 7 0.01
CTC13 CTC13 G6 V4-SA707 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 18035 649 0.04
CTC14 CTC14 H6 V4-SA708 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 6,17 1708 3 0.00
CTC15 CTC15 I6 V4-SA709 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 23180 17 0.00
CTC2 CTC2 H5 V4-SA708 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 30692 5 0.00
CTC3 CTC3 I5 V4-SA709 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 39920 6 0.00
CTC4 CTC4 J5 V4-SA710 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 1,15 2139 13 0.01
CTC5 CTC5 K5 V4-SA711 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 15789 1 0.00
CTC6 CTC6 L5 V4-SA712 V3-SA505 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 58 19753 31 0.00
CTC9 CTC9 C6 V4-SA703 V3-SA506 Wolbachia - Lab France Whole Culex quinquefasciatus N/A 0 09/03/2018 run2 True sample 33,6 4980 3 0.00
NP14 NP14 K4 V4-SA711 V3-SA504 Guadeloupe Field Guadeloupe Ovary Aedes aegypti 1a 0 N/A run3 True sample 0,437 7973 0 0.00
NP2 NP2 K3 V4-SA711 V3-SA503 Guadeloupe Field Guadeloupe Ovary Culex quinquefasciatus 1c 0 N/A run3 True sample 41,6 648335 0 0.00
NP20 NP20 E5 V4-SA705 V3-SA505 Guadeloupe Field Guadeloupe Ovary Aedes aegypti 3a 0 N/A run3 True sample 0,357 136 0 0.00
NP27 NP27 L5 V4-SA712 V3-SA505 Guadeloupe Field Guadeloupe Whole Culex quinquefasciatus 7c 0 N/A run3 True sample 1,16 1234 0 0.00
NP29 NP29 B6 V4-SA702 V3-SA506 Guadeloupe Field Guadeloupe Whole Culex quinquefasciatus 9c 0 N/A run3 True sample 0,314 203 0 0.00
NP30 NP30 C6 V4-SA703 V3-SA506 Guadeloupe Field Guadeloupe Whole Culex quinquefasciatus 10c 0 N/A run3 True sample 0,666 228 0 0.00
NP34 NP34 G6 V4-SA707 V3-SA506 Guadeloupe Field Guadeloupe Whole Culex quinquefasciatus 14c 0 N/A run3 True sample 0,486 95 0 0.00
NP35 NP35 H6 V4-SA708 V3-SA506 Guadeloupe Field Guadeloupe Whole Aedes aegypti 7a 0 N/A run3 True sample 4,64 196532 0 0.00
NP36 NP36 I6 V4-SA709 V3-SA506 Guadeloupe Field Guadeloupe Whole Aedes aegypti 8a 0 N/A run3 True sample 1,06 249 0 0.00
NP37 NP37 J6 V4-SA710 V3-SA506 Guadeloupe Field Guadeloupe Whole Aedes aegypti 9a 0 N/A run3 True sample 22,7 419340 0 0.00
NP38 NP38 K6 V4-SA711 V3-SA506 Guadeloupe Field Guadeloupe Whole Aedes aegypti 10a 0 N/A run3 True sample 3,88 282479 0 0.00
NP39 NP39 L6 V4-SA712 V3-SA506 Guadeloupe Field Guadeloupe Whole Aedes aegypti 11a 0 N/A run3 True sample 20,2 218684 0 0.00
NP41 NP41 B7 V4-SA702 V3-SA507 Guadeloupe Field Guadeloupe Whole Aedes aegypti 13a 0 N/A run3 True sample 5,32 247152 40 0.00
NP42 NP42 C7 V4-SA703 V3-SA507 Guadeloupe Field Guadeloupe Whole Aedes aegypti 14a 0 N/A run3 True sample 4,65 185157 0 0.00
NP43 NP43 D7 V4-SA704 V3-SA507 Guadeloupe Field Guadeloupe Whole Aedes aegypti 15a 0 N/A run3 True sample 6,89 239335 0 0.00
NP44 NP44 E7 V4-SA705 V3-SA507 Guadeloupe Field Guadeloupe Whole Aedes aegypti 16a 0 N/A run3 True sample 21,7 156879 0 0.00
NP5 NP5 B4 V4-SA702 V3-SA504 Guadeloupe Field Guadeloupe Ovary Culex quinquefasciatus 2c 0 N/A run3 True sample 33,5 736159 0 0.00
NP8 NP8 E4 V4-SA705 V3-SA504 Guadeloupe Field Guadeloupe Ovary Culex quinquefasciatus 3c 0 N/A run3 True sample 46 334799 0 0.00
S100 S100 K7 V4-SA711 V3-SA507 Camping Europe Field France Ovary Culex pipiens GL1 1 30/05/2017 run1 True sample 8,02 52486 0 0.00
S102 S102 A8 V4-SA701 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL2 2 30/05/2017 run1 True sample 0,241 3456 0 0.00
S104 S104 C8 V4-SA703 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL5 5 30/05/2017 run1 True sample 24,1 52403 4 0.00
S105 S105 D8 V4-SA704 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL6 6 30/05/2017 run1 True sample 6,83 55577 0 0.00
S106 S106 E8 V4-SA705 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL7 7 30/05/2017 run1 True sample 51 33053 0 0.00
S107 S107 F8 V4-SA706 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL8 8 30/05/2017 run1 True sample 32,6 52154 0 0.00
S108 S108 G8 V4-SA707 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL9 9 30/05/2017 run1 True sample 32,2 55735 0 0.00
S109 S109 H8 V4-SA708 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL10 10 30/05/2017 run1 True sample 26,5 59023 0 0.00
S110 S110 I8 V4-SA709 V3-SA508 Camping Europe Field France Ovary Culex pipiens GL11 0 30/05/2017 run1 True sample 27,5 57377 0 0.00
S121 S121 H1 V4-SA708 V3-SA501 Bosc Field France Ovary Culex pipiens J26 22 28/06/2017 run2 True sample 12,7 20361 0 0.00
S122 S122 I1 V4-SA709 V3-SA501 Bosc Field France Ovary Culex pipiens J27 23 28/06/2017 run2 True sample 22,7 9803 0 0.00
S123 S123 J1 V4-SA710 V3-SA501 Bosc Field France Ovary Culex pipiens J28 24 28/06/2017 run2 True sample 6,41 20130 0 0.00
S124 S124 K1 V4-SA711 V3-SA501 Bosc Field France Ovary Culex pipiens J29 25 28/06/2017 run2 True sample 33,9 18146 0 0.00
S126 S126 K6 V4-SA711 V3-SA506 Bosc Field France Ovary Culex pipiens J30 26 28/06/2017 run2 True sample 58 15235 0 0.00
S127 S127 B2 V4-SA702 V3-SA502 Bosc Field France Ovary Culex pipiens J31 27 28/06/2017 run2 True sample 12,8 24696 0 0.00
S128 S128 C2 V4-SA703 V3-SA502 Bosc Field France Ovary Culex pipiens J32 28 28/06/2017 run2 True sample 35,1 16305 0 0.00
S146 S146 I3 V4-SA709 V3-SA503 Lavar (labo) Lab France Ovary Culex pipiens MW52 29 29/08/2017 run2 True sample 35,2 25012 0 0.00
S147 S147 J3 V4-SA710 V3-SA503 Lavar (labo) Lab France Ovary Culex pipiens MW53 30 29/08/2017 run2 True sample 27,1 25171 0 0.00
S148 S148 K3 V4-SA711 V3-SA503 Lavar (labo) Lab France Ovary Culex pipiens MW54 31 29/08/2017 run2 True sample 43,2 14164 0 0.00
S150 S150 A4 V4-SA701 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW55 32 29/08/2017 run2 True sample 2,3 15081 0 0.00
S151 S151 B4 V4-SA702 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW56 33 29/08/2017 run2 True sample 38,8 22944 0 0.00
S152 S152 C4 V4-SA703 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW57 34 29/08/2017 run2 True sample 39,8 15082 0 0.00
S153 S153 D4 V4-SA704 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW58 35 29/08/2017 run2 True sample 52 17040 0 0.00
S154 S154 E4 V4-SA705 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW59 36 29/08/2017 run2 True sample 37,7 9626 0 0.00
S160 S160 K4 V4-SA711 V3-SA504 Lavar (labo) Lab France Ovary Culex pipiens MW60 37 29/08/2017 run2 True sample 58 72508 0 0.00
S162 S162 B5 V4-SA702 V3-SA505 Lavar (labo) Lab France Ovary Culex pipiens MW61 38 29/08/2017 run2 True sample 42 25180 0 0.00
S163 S163 L6 V4-SA712 V3-SA506 Lavar (labo) Lab France Ovary Culex pipiens MW62 39 30/08/2017 run2 True sample 51 12333 0 0.00
S164 S164 C5 V4-SA703 V3-SA505 Lavar (labo) Lab France Ovary Culex pipiens MW63 40 30/08/2017 run2 True sample 36,6 22368 0 0.00
S165 S165 D5 V4-SA704 V3-SA505 Lavar (labo) Lab France Ovary Culex pipiens MW64 41 30/08/2017 run2 True sample 53 17731 0 0.00
S166 S166 E5 V4-SA705 V3-SA505 Camping Europe Field France Ovary Culex pipiens GL4 4 30/05/2017 run2 True sample 23,1 13979 0 0.00
S167 S167 F5 V4-SA706 V3-SA505 Bosc Field France Ovary Culex pipiens J32 28 28/06/2017 run2 True sample 29,1 14048 0 0.00
S169 S169 B7 V4-SA702 V3-SA507 Camping Europe Field France Ovary Culex pipiens 5 43 16/05/2017 run2 True sample 5,84 11553 0 0.00
S170 S170 C7 V4-SA703 V3-SA507 Camping Europe Field France Ovary Culex pipiens 6 44 16/05/2017 run2 True sample 5,55 8852 0 0.00
S18 S18 A1 V4-SA701 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW75 0 30/08/2017 run1 True sample 0,089 4290 0 0.00
S19 S19 B1 V4-SA702 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW65 0 30/08/2017 run1 True sample 21,9 44527 0 0.00
S20 S20 C1 V4-SA703 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW66 0 30/08/2017 run1 True sample 16,6 42864 0 0.00
S21 S21 D1 V4-SA704 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW67 0 30/08/2017 run1 True sample 12,4 33798 0 0.00
S22 S22 E1 V4-SA705 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW68 0 30/08/2017 run1 True sample 24,1 19044 0 0.00
S23 S23 F1 V4-SA706 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW69 0 30/08/2017 run1 True sample 20,8 38172 0 0.00
S24 S24 G1 V4-SA707 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW70 0 30/08/2017 run1 True sample 34,2 42355 0 0.00
S25 S25 H1 V4-SA708 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW71 0 30/08/2017 run1 True sample 21,9 47688 8 0.00
S26 S26 I1 V4-SA709 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW72 0 30/08/2017 run1 True sample 0,322 5394 0 0.00
S27 S27 J1 V4-SA710 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW73 0 30/08/2017 run1 True sample 11,3 24558 2 0.00
S28 S28 A2 V4-SA701 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW74 0 30/08/2017 run1 True sample 0,112 4503 0 0.00
S30 S30 K1 V4-SA711 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW1 0 23/08/2017 run1 True sample 4,43 25353 1855 0.07
S31 S31 L1 V4-SA712 V3-SA501 Lavar (labo) Lab France Whole Culex pipiens MW2 0 23/08/2017 run1 True sample 2,66 20417 0 0.00
S32 S32 C2 V4-SA703 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW3 0 23/08/2017 run1 True sample 0,504 12441 1263 0.10
S33 S33 D2 V4-SA704 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW4 0 23/08/2017 run1 True sample 0,782 33867 0 0.00
S34 S34 E2 V4-SA705 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW5 0 23/08/2017 run1 True sample 1,38 9367 0 0.00
S35 S35 F2 V4-SA706 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW6 0 23/08/2017 run1 True sample 0,56 11663 0 0.00
S36 S36 G2 V4-SA707 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW7 0 23/08/2017 run1 True sample 39,8 33020 91 0.00
S37 S37 H2 V4-SA708 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW8 0 23/08/2017 run1 True sample 41,3 18340 3006 0.16
S38 S38 I2 V4-SA709 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW9 0 23/08/2017 run1 True sample 32,1 54790 53 0.00
S39 S39 J2 V4-SA710 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW10 0 23/08/2017 run1 True sample 37,3 36273 972 0.03
S40 S40 K2 V4-SA711 V3-SA502 Lavar (labo) Lab France Whole Culex pipiens MW11 0 23/08/2017 run1 True sample 4,58 44448 0 0.00
S42 S42 A3 V4-SA701 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE1 0 30/05/2017 run1 True sample 0 4107 0 0.00
S43 S43 B3 V4-SA702 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE2 0 30/05/2017 run1 True sample 0,191 9279 0 0.00
S44 S44 C3 V4-SA703 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE3 0 30/05/2017 run1 True sample 0,102 8026 23 0.00
S45 S45 D3 V4-SA704 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE4 0 30/05/2017 run1 True sample 0,223 18150 0 0.00
S47 S47 F3 V4-SA706 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE6 0 30/05/2017 run1 True sample 0,291 1951 0 0.00
S48 S48 G3 V4-SA707 V3-SA503 Camping Europe Field France Whole Culex pipiens GLE7 0 30/05/2017 run1 True sample 3,44 56738 2 0.00
S49 S49 H3 V4-SA708 V3-SA503 Bosc Field France Whole Culex pipiens E1 0 28/06/2017 run1 True sample 1,1 33498 66 0.00
S50 S50 I3 V4-SA709 V3-SA503 Bosc Field France Whole Culex pipiens E2 0 28/06/2017 run1 True sample 0,771 28481 0 0.00
S51 S51 J3 V4-SA710 V3-SA503 Bosc Field France Whole Culex pipiens E3 0 28/06/2017 run1 True sample 17,8 61788 18 0.00
S52 S52 K3 V4-SA711 V3-SA503 Bosc Field France Whole Culex pipiens E4 0 28/06/2017 run1 True sample 0,495 21553 0 0.00
S55 S55 B4 V4-SA702 V3-SA504 Bosc Field France Whole Culex pipiens E6 0 28/06/2017 run1 True sample 2,85 50447 0 0.00
S56 S56 C4 V4-SA703 V3-SA504 Bosc Field France Whole Culex pipiens E7 0 28/06/2017 run1 True sample 3,6 42609 0 0.00
S57 S57 D4 V4-SA704 V3-SA504 Bosc Field France Whole Culex pipiens E8 0 28/06/2017 run1 True sample 4,92 49157 0 0.00
S58 S58 E4 V4-SA705 V3-SA504 Bosc Field France Whole Culex pipiens E9 0 28/06/2017 run1 True sample 1,63 30357 0 0.00
S59 S59 F4 V4-SA706 V3-SA504 Bosc Field France Whole Culex pipiens E10 0 28/06/2017 run1 True sample 1,64 32798 0 0.00
S60 S60 G4 V4-SA707 V3-SA504 Bosc Field France Whole Culex pipiens E11 0 28/06/2017 run1 True sample 2,7 44485 0 0.00
S61 S61 H4 V4-SA708 V3-SA504 Bosc Field France Whole Culex pipiens E12 0 28/06/2017 run1 True sample 2 49545 0 0.00
S63 S63 J4 V4-SA710 V3-SA504 Bosc Field France Whole Culex pipiens E14 0 28/06/2017 run1 True sample 6,13 53444 0 0.00
S64 S64 K4 V4-SA711 V3-SA504 Bosc Field France Whole Culex pipiens E15 0 28/06/2017 run1 True sample 4,15 47628 0 0.00
S79 S79 B6 V4-SA702 V3-SA506 Camping Europe Field France Ovary Culex pipiens J16 12 28/06/2017 run1 True sample 33,8 59755 0 0.00
S80 S80 C6 V4-SA703 V3-SA506 Camping Europe Field France Ovary Culex pipiens J17 13 28/06/2017 run1 True sample 4,58 52788 0 0.00
S83 S83 F6 V4-SA706 V3-SA506 Camping Europe Field France Ovary Culex pipiens J20 16 28/06/2017 run1 True sample 35,5 42272 0 0.00
S84 S84 G6 V4-SA707 V3-SA506 Camping Europe Field France Ovary Culex pipiens J21 17 28/06/2017 run1 True sample 21 56676 0 0.00
S85 S85 H6 V4-SA708 V3-SA506 Camping Europe Field France Ovary Culex pipiens J22 18 28/06/2017 run1 True sample 11,6 41690 0 0.00
S86 S86 I6 V4-SA709 V3-SA506 Camping Europe Field France Ovary Culex pipiens J23 19 28/06/2017 run1 True sample 4,14 61984 0 0.00
S87 S87 J6 V4-SA710 V3-SA506 Bosc Field France Ovary Culex pipiens J24 20 28/06/2017 run1 True sample 28,1 65958 0 0.00
S88 S88 K6 V4-SA711 V3-SA506 Bosc Field France Ovary Culex pipiens J25 21 28/06/2017 run1 True sample 8,6 53102 0 0.00
# replace metadata in the created phyloseq object
sample_data(ps) <- sample_data(new.metadata)

Taxonomic structure

Count

col <- brewer.pal(7, "Pastel2")

# reshape data for plot
test3 <- test %>% select(c(Sample, Species, Location, Organ, Read_depth, Read_serratia)) %>% reshape2::melt(id.vars=c("Sample", "Species", "Location", "Organ"), vars=c("Read_depth", "Read_serratia"))

count_whole <- test3[test3$Organ=="Whole",]
count_ovary <- test3[test3$Organ=="Ovary",]

make.italic <- function(x) as.expression(lapply(x, function(y) bquote(italic(.(y)))))

levels(count_whole$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(count_ovary$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(count_whole$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"), "- (Slab TC)")))

levels(count_ovary$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"), "- (Slab TC)")))


# plot
p_count1 <- ggplot(count_whole, aes(x = Sample, y = value, fill=variable))+ 
  geom_bar(position = "dodge", stat = "identity")+
  scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, size=12, hjust=1, vjust=0.5)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 20), 
        legend.position="bottom",
        legend.text=element_text(size=14), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        strip.text.x = element_text(size = 16),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        axis.text.y = element_text(size=18)) +
  facet_wrap(~Species+Location+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 900000)+
  geom_text(aes(label=value), position=position_dodge(width=1.1), width=0.25, size=4, hjust=-0.25, vjust=0.5, angle=90)+
  guides(fill=guide_legend(title="Read"))
## Warning: Ignoring unknown parameters: width
p_count2 <- ggplot(count_ovary, aes(x = Sample, y = value, fill=variable))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, size=18, hjust=1, vjust=0.5)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 20), 
        legend.position="bottom",
        legend.text=element_text(size=14), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        strip.text.x = element_text(size = 16),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        axis.text.y = element_text(size=18)) +
 facet_wrap(~Species+Location+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
    ylim(0, 900000)+
  geom_text(aes(label=value), position=position_dodge(width=0.8), width=0.25, size=4, hjust=-0.25, vjust=0.5, angle=90)+
  guides(fill=guide_legend(title="Read"))
## Warning: Ignoring unknown parameters: width
# afficher plot
p_count1
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

p_count2

# panels
p_group <- plot_grid(p_count1+theme(legend.position="none"), 
          p_count2+theme(legend.position="none"), 
          nrow=2, 
          ncol=1)+
    draw_plot_label(c("B1", "B2"), c(0, 0), c(1, 0.5), size = 20)
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals
legend_plot <- get_legend(p_count1 + theme(legend.position="bottom"))
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals
p_counts <- plot_grid(p_group, legend_plot, nrow=2, ncol=1, rel_heights = c(1, .1))
p_counts

Whole (the most abundant nodes)

guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 16, face = "italic", colour = "Black", angle = 0),
                                            nrow=2, byrow=TRUE))

# select whole
ps.filter.whole <- subset_samples(ps, Organ=="Whole")
ps.filter.whole <- prune_taxa(taxa_sums(ps.filter.whole) >= 1, ps.filter.whole)
ps.filter.whole <- prune_samples(sample_sums(ps.filter.whole) >= 1, ps.filter.whole)
ps.filter.whole
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 18 sample variables ]
## tax_table()   Taxonomy Table:    [ 2 taxa by 1 taxonomic ranks ]
# data pour plot
data_for_plot2 <- taxo_data_fast(ps.filter.whole, method = "abundance")
## Warning in `[<-.factor`(`*tmp*`, ri, value = "Other"): invalid factor level, NA
## generated
paste0("\n15 MOST ABUNDANT GENUS: \n") %>% cat()
## 
## 15 MOST ABUNDANT GENUS:
paste0("\"", levels(data_for_plot2$Name), "\",\n") %>% cat()
## "oligotype_A (27) | size:6013 / N1114 (27) | size:6013.",
##  "oligotype_G (23) | size:2498 / N1116 (23) | size:2498.",
##  "Other.",
new_names <- c("oligotype_A (27) | size:6013 / N1114 (27) | size:6013.",
               "oligotype_G (23) | size:2498 / N1116 (23) | size:2498."
)

data_for_plot2$Name <- factor(data_for_plot2$Name, levels = new_names)

col_add <- brewer.pal(8, "Accent")

col <- c("oligotype_A (27) | size:6013 / N1114 (27) | size:6013."="#B136F5",
         "oligotype_G (23) | size:2498 / N1116 (23) | size:2498."="#B863FF")

levels(data_for_plot2$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(data_for_plot2$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"), "- (Slab TC)")))

data_for_plot2 <- data_for_plot2 %>% na.omit()

p2 <- ggplot(data_for_plot2, aes(x = Sample, y = Relative_Abundance, fill = Name, species=Species, organ=Organ, location=Location))+ 
  geom_bar(position = "stack", stat = "identity")+
  scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, size=18, hjust=1, vjust=0.5)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 20), 
        legend.position="bottom",
        legend.text = element_text(size=14),
        #legend.key.height = unit(1, 'cm'),
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        strip.text.x = element_text(size = 16),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        axis.text.y = element_text(size=18)) +
  facet_wrap(~Species+Location+Organ, scales = "free", ncol=3, labeller=label_parsed)+
  labs(x="Sample", y="Relative abundance", fill="Oligotype / MED node")

p2

Ovary (the most abundant nodes)

# select ovary
ps.filter.ovary <- subset_samples(ps, Organ=="Ovary")
ps.filter.ovary <- prune_taxa(taxa_sums(ps.filter.ovary) >= 1, ps.filter.ovary)
ps.filter.ovary <- prune_samples(sample_sums(ps.filter.ovary) >= 1, ps.filter.ovary)
ps.filter.ovary
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1 taxa and 1 samples ]
## sample_data() Sample Data:       [ 1 samples by 18 sample variables ]
## tax_table()   Taxonomy Table:    [ 1 taxa by 1 taxonomic ranks ]
ps.filter.ovary@otu_table # only 1 oligotype
## OTU Table:          [1 taxa and 1 samples]
##                      taxa are rows
##                                                            S104
## oligotype_A (27) | size:6013 / node_N1114 (27) | size:6013    4
a <- data.frame(ps.filter.ovary@sam_data)
b <- data.frame(ps.filter.ovary@tax_table)
count_table <- data.frame(ps.filter.ovary@otu_table)

count_table %>%
  kbl() %>%
  kable_paper("hover", full_width = F)
S104
oligotype_A (27) | size:6013 / node_N1114 (27) | size:6013 4
a$Relative_Abundance <- 1.0000000000
a$Name <- paste0(b$ref,".")

levels(a$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(a$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"), "- (Slab TC)")))

p3 <- ggplot(a, aes(x = Sample, y = Relative_Abundance, fill = Name, species=Species, organ=Organ, location=Location))+
  geom_bar(position = "stack", stat = "identity")+
  scale_fill_manual(values = "#B136F5")+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  ggtitle("") +
  guide_italics+
  theme(legend.title = element_text(size = 20), 
        legend.position="bottom",
        legend.text = element_text(size=14),
        #legend.key.height = unit(1, 'cm'),
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        strip.text.x = element_text(size = 16),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        axis.text.y = element_text(size=18)) +
 facet_wrap(~Species+Location+Organ, scales = "free", ncol=3, labeller=label_parsed)+
  labs(x="Sample", y="Relative abundance", fill="Oligotype / MED Node")

p3

Panels taxonomy of whole / ovary

legend_plot <- get_legend(p2 + theme(legend.position="bottom"))

# panels
p_group <- plot_grid(p2+theme(legend.position="none"), 
          p3+theme(legend.position="none"), 
          nrow=2, 
          ncol=1)+
    draw_plot_label(c("A1", "A2"), c(0, 0), c(1, 0.5), size = 20)

p_taxo <- plot_grid(p_group, legend_plot, nrow=2, rel_heights = c(1, .1))
p_taxo

Save taxonomic plot

setwd(path_plot)

tiff("2Dg_OLIGO_counts_serratia.tiff", units="in", width=20, height=18, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
tiff("2Dg_OLIGO_taxonomic_serratia_whole.tiff", units="in", width=16, height=12, res=300)
p2
dev.off()
## quartz_off_screen 
##                 2
tiff("2Dg_OLIGO_taxonomic_serratia_ovary.tiff", units="in", width=18, height=14, res=300)
p3
dev.off()
## quartz_off_screen 
##                 2
tiff("2Dg_OLIGO_taxonomic_serratia.tiff", units="in", width=18, height=16, res=300)
p_taxo
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_counts_serratia_big.png", units="in", width=20, height=18, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_counts_serratia_small.png", units="in", width=18, height=14, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_taxonomic_serratia_whole.png", units="in", width=16, height=12, res=300)
p2
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_taxonomic_serratia_ovary.png", units="in", width=18, height=14, res=300)
p3
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_taxonomic_serratia_big.png", units="in", width=18, height=18, res=300)
p_taxo
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_taxonomic_serratia_small.png", units="in", width=18, height=14, res=300)
p_taxo
dev.off()
## quartz_off_screen 
##                 2

Make main plot

setwd(paste0(path_oligo,"/serratia/oligotyping_Serratia_sequences-c1-s1-a0.0-A0-M10/HTML-OUTPUT"))

img <- magick::image_read("entropy.png")
p_entropy <- magick::image_ggplot(img, interpolate = TRUE)
p_entropy+ theme(plot.margin = unit(c(-7,-2.5,-7,-0.5), "cm"))

p_entropy+ theme(plot.margin=unit(c(-7,-2,-12,-5), "mm"))

aligned <- plot_grid(p_taxo, 
                     p_counts, 
                     align="hv")

aligned

p_entropy2 <- plot_grid(p_entropy, nrow=1)+
  draw_plot_label(c("C"), c(0), c(1), size=20, hjust=-0.5)

p_entropy2

t_plot <- plot_grid(aligned, 
                    p_entropy2,
                    nrow=2, 
                    ncol=1, 
                    scale=1,
                    rel_heights=c(2,1))

t_plot

setwd(path_plot)

tiff("2Dg_OLIGO_main_serratia.tiff", width=36, height=36, res=300, units="in")
t_plot
dev.off()
## quartz_off_screen 
##                 2
png("2Dg_OLIGO_main_serratia.png", width=36, height=36, res=300, units="in")
t_plot
dev.off()
## quartz_off_screen 
##                 2